# This file includes statistical analyses and code used
# to create all Figures that require calculation and
# appear in the Main Manuscript (Figures 4, 6, 7, and 8).

# The code structure is designed to run sequentially. Functions and
# Datasets created to generate earlier figures are drawn upon in later figures
# and analyses.

# The code should be run sequentially after coding files 1-2

############################################################################
# FIGURE 4: Mean Severity Rankings in 80 focus groups
############################################################################

#general functions
stderr <- function(x, na.rm=FALSE) {
  if (na.rm) x <- na.omit(x)
  sqrt(var(x)/length(x))
}

summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=TRUE,
                      .drop=TRUE) {
  library(plyr)
  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     min = min   (xx[[col]], na.rm=na.rm),
                     max = max   (xx[[col]], na.rm=na.rm),
                     mean = round(mean   (xx[[col]], na.rm=na.rm), 3),
                     sd   = round(sd     (xx[[col]], na.rm=na.rm), 3),
                     se   = round(stderr     (xx[[col]], na.rm=na.rm), 3)
                   )
                 },
                 measurevar
  )
  return(datac)
}

#label focus groups as male or female by number
D2$Gender <- NA
D2$Gender[D2$FGID==1 | D2$FGID==2] <- "Women"
D2$Gender[D2$FGID==3 | D2$FGID==4] <- "Men"

#apply summary SE function to summarize data
SEP1 <- data.frame(NA, summarySE(data=D2, measurevar="P1", groupvars=c("Gender"), na.rm=TRUE, .drop=TRUE))
SEP2 <- data.frame(NA, summarySE(data=D2, measurevar="P2", groupvars=c("Gender"), na.rm=TRUE, .drop=TRUE))
SEP3 <- data.frame(NA, summarySE(data=D2, measurevar="P3", groupvars=c("Gender"), na.rm=TRUE, .drop=TRUE))
SEP4 <- data.frame(NA, summarySE(data=D2, measurevar="P4", groupvars=c("Gender"), na.rm=TRUE, .drop=TRUE))
SEP5 <- data.frame(NA, summarySE(data=D2, measurevar="P5", groupvars=c("Gender"), na.rm=TRUE, .drop=TRUE))

SEP1[,1] <- "Counsel"
SEP2[,1] <- "Payment"
SEP3[,1] <- "Expulsion"
SEP4[,1] <- "Prison"
SEP5[,1] <- "Beating"


SEP <- rbind(SEP1, SEP2, SEP3, SEP4, SEP5)
colnames(SEP)[1] <- "Punishment"
SEP$Punishment <- factor(SEP$Punishment,levels = c("Counsel","Payment", "Expulsion", "Prison", "Beating"))
SEP

#generate figure
ggplot(SEP, aes(x=Punishment, y=mean, colour=Gender)) + 
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.3, position=position_dodge()) +
  scale_colour_manual(values=c('#fba210','#2c3Ab8')) + 
  ylab("Mean Severity Ranking") +
  theme_bw()

############################################################################
# FIGURE 6: Preferences by Group Heterogeneity
############################################################################

#I create new dataset where I stack the data such that I count each individual 4 times
#include a factor variable indicating which outcome it represents
#significance means difference from base category (private attitude) in terms of predicting outcomes

#unique identifier for analyses (fixed individual level effects)
D3$ROWID <- rownames(D3)

#Generate Crime specific datasets with a variable OUTTYPE.XXX for each preference type
D3.VIOL <- reshape(data=D3, varying = list(c("PRIV1.RC2.VIOL", "PUB1.RC2.VIOL", "GCHOIX.RC2.VIOL", "PRIV2.RC2.VIOL")), timevar = c("OUTTYPE.VIOL"), idvar = "ROWID", direction="long", v.names = "ALLOUT.VIOL")

D3.DV <- reshape(data=D3, varying = list(c("PRIV1.RC2.DV", "PUB1.RC2.DV", "GCHOIX.RC2.DV", "PRIV2.RC2.DV")), timevar = c("OUTTYPE.DV"), idvar = "ROWID", direction="long", v.names = "ALLOUT.DV")

D3.VOL <- reshape(data=D3, varying = list(c("PRIV1.RC2.VOL", "PUB1.RC2.VOL", "GCHOIX.RC2.VOL", "PRIV2.RC2.VOL")),  timevar = c("OUTTYPE.VOL"), idvar = "ROWID", direction="long", v.names = "ALLOUT.VOL")

#CREATE FACTOR VARIABLE TO GET LABELS
D3.VIOL$OUTTYPE.FACTOR <- factor(D3.VIOL$OUTTYPE.VIOL, levels=c(1, 2, 3, 4), labels=c("Private","Public","Group", "Post-FG Private"))
D3.DV$OUTTYPE.FACTOR <- factor(D3.DV$OUTTYPE.DV, levels=c(1, 2, 3, 4), labels=c("Private","Public","Group", "Post-FG Private"))
D3.VOL$OUTTYPE.FACTOR <- factor(D3.VOL$OUTTYPE.VOL, levels=c(1, 2, 3, 4), labels=c("Private","Public","Group", "Post-FG Private"))

D3.VIOL$FEMALE.FACTOR <- factor(D3.VIOL$FEMALE, levels=c(0,1), labels=c("MALE","FEMALE"))
D3.DV$FEMALE.FACTOR <- factor(D3.DV$FEMALE, levels=c(0,1), labels=c("MALE","FEMALE"))
D3.VOL$FEMALE.FACTOR <- factor(D3.VOL$FEMALE, levels=c(0,1), labels=c("MALE","FEMALE"))

# use summary function to calculate summary statistics disaggregated by subgroup type (heterogeneous, homogeneous high and low status)
VIOLMEANS.H <- summarySE(D3.VIOL, measurevar="ALLOUT.VIOL", groupvars=c("HHL.FAC", "OUTTYPE.FACTOR"), na.rm=TRUE)
DVMEANS.H <- summarySE(D3.DV, measurevar="ALLOUT.DV", groupvars=c("HHL.FAC", "OUTTYPE.FACTOR"), na.rm=TRUE)
VOLMEANS.H <- summarySE(D3.VOL, measurevar="ALLOUT.VOL", groupvars=c("HHL.FAC","OUTTYPE.FACTOR"), na.rm=TRUE)

# get a measure for ALL together; and paste alongside group-type disaggregated summary statistics
VIOLMEANS2.H <- cbind("", summarySE(D3.VIOL, measurevar="ALLOUT.VIOL", groupvars=c("OUTTYPE.FACTOR"), na.rm=TRUE))
names(VIOLMEANS2.H)[1] <- "HHL.FAC"
VIOLMEANS2.H[,1] <- "ALL"
DVMEANS2.H <- cbind("", summarySE(D3.DV, measurevar="ALLOUT.DV", groupvars=c("OUTTYPE.FACTOR"), na.rm=TRUE))
names(DVMEANS2.H)[1] <- "HHL.FAC"
DVMEANS2.H[,1] <- "ALL"
VOLMEANS2.H <- cbind("", summarySE(D3.VOL, measurevar="ALLOUT.VOL", groupvars=c("OUTTYPE.FACTOR"), na.rm=TRUE))
names(VOLMEANS2.H)[1] <- "HHL.FAC"
VOLMEANS2.H[,1] <- "ALL"

#combine into data frame
VIOLMEANS3.H <- data.frame(rbind(VIOLMEANS.H, VIOLMEANS2.H))
DVMEANS3.H <- rbind(DVMEANS.H, DVMEANS2.H)
VOLMEANS3.H <- rbind(VOLMEANS.H, VOLMEANS2.H)

# generate three plots, one for each crime type

#rape
MP1.H <- ggplot(VIOLMEANS3.H, aes(x=OUTTYPE.FACTOR, y=mean, colour=HHL.FAC, shape=VIOLMEANS3.H$HHL.FAC)) +
  geom_errorbar(aes(ymin=VIOLMEANS3.H$mean-1.96*VIOLMEANS3.H$se, ymax=VIOLMEANS3.H$mean+1.96*VIOLMEANS3.H$se), width=.1, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4)) +
  geom_point(position=position_dodge(0.4))+
  labs(title="Rape",x="", y = "Response") +
  scale_y_continuous(limits=c(1,4)) +
  scale_colour_manual(values=c('#fba210', '#10dffb', '#2c3Ab8', '#000099')) + 
  theme_bw()+
  theme(legend.title=element_blank(), axis.text.x = element_text(angle = 35, hjust = 1))

#wife-beating
MP2.H <- ggplot(DVMEANS3.H, aes(x=OUTTYPE.FACTOR, y=mean, colour=HHL.FAC, shape=DVMEANS3.H$HHL.FAC)) +
  geom_errorbar(aes(ymin=DVMEANS3.H$mean-1.96*DVMEANS3.H$se, ymax=DVMEANS3.H$mean+1.96*DVMEANS3.H$se), width=.1, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4)) +
  geom_point(position=position_dodge(0.4))+
  labs(title="Wife-beating",x="", y = "Response") +
  scale_y_continuous(limits=c(1,4))+
  scale_colour_manual(values=c('#fba210', '#10dffb', '#2c3Ab8', '#000099')) + 
  theme_bw()+
  theme(legend.title=element_blank(), axis.text.x = element_text(angle = 35, hjust = 1))

#theft
MP3.H <-ggplot(VOLMEANS3.H, aes(x=OUTTYPE.FACTOR, y=mean, colour=HHL.FAC, shape=VOLMEANS3.H$HHL.FAC)) +
  geom_errorbar(aes(ymin=VOLMEANS3.H$mean-1.96*VOLMEANS3.H$se, ymax=VOLMEANS3.H$mean+1.96*VOLMEANS3.H$se), width=.1, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4)) +
  geom_point(position=position_dodge(0.4))+
  labs(title="Theft",x="", y = "Response") +
  scale_y_continuous(limits=c(1,4))+
  scale_colour_manual(values=c('#fba210', '#10dffb', '#2c3Ab8', '#000099')) +  
  theme_bw()+
  theme(legend.title=element_blank(), axis.text.x = element_text(angle = 35, hjust = 1))

#legend and formatting
g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}
lay <- rbind(c(1,1,2,2,3,3,4),
             c(1,1,2,2,3,3,4))
mylegend<-g_legend(MP3.H)

#output figure
grid.arrange(MP1.H + theme(legend.position="none"), 
             MP2.H+ theme(legend.position="none"), 
             MP3.H+ theme(legend.position="none"), 
             mylegend,
             layout_matrix = lay,
             nrow=2, ncol=7)


############################################################################
# FIGURE 7: Men's and Women's preferences
############################################################################

#using same D3.VIOL, D3.DV and D3.VOL datasets generated with Figure 6

# use summary function to calculate summary statistics disaggregated by gender
VIOLMEANS <- summarySE(D3.VIOL, measurevar="ALLOUT.VIOL", groupvars=c("FEMALE.FACTOR", "OUTTYPE.FACTOR"), na.rm=TRUE)
DVMEANS <- summarySE(D3.DV, measurevar="ALLOUT.DV", groupvars=c("FEMALE.FACTOR", "OUTTYPE.FACTOR"), na.rm=TRUE)
VOLMEANS <- summarySE(D3.VOL, measurevar="ALLOUT.VOL", groupvars=c("FEMALE.FACTOR","OUTTYPE.FACTOR"), na.rm=TRUE)

# get a measure for ALL together; and paste alongside gender disaggregated summary statistics
VIOLMEANS2 <- cbind("", summarySE(D3.VIOL, measurevar="ALLOUT.VIOL", groupvars=c("OUTTYPE.FACTOR"), na.rm=TRUE))
names(VIOLMEANS2)[1] <- "FEMALE.FACTOR"
VIOLMEANS2[,1] <- "ALL"
DVMEANS2 <- cbind("", summarySE(D3.DV, measurevar="ALLOUT.DV", groupvars=c("OUTTYPE.FACTOR"), na.rm=TRUE))
names(DVMEANS2)[1] <- "FEMALE.FACTOR"
DVMEANS2[,1] <- "ALL"
VOLMEANS2 <- cbind("", summarySE(D3.VOL, measurevar="ALLOUT.VOL", groupvars=c("OUTTYPE.FACTOR"), na.rm=TRUE))
names(VOLMEANS2)[1] <- "FEMALE.FACTOR"
VOLMEANS2[,1] <- "ALL"

# create data frame
VIOLMEANS3 <- data.frame(rbind(VIOLMEANS, VIOLMEANS2))
DVMEANS3 <- rbind(DVMEANS, DVMEANS2)
VOLMEANS3 <- rbind(VOLMEANS, VOLMEANS2)

# generate a plot for each crime type
#rape
MP1 <- ggplot(VIOLMEANS3, aes(x=OUTTYPE.FACTOR, y=mean, colour=FEMALE.FACTOR, shape=VIOLMEANS3$FEMALE.FACTOR)) +
  geom_errorbar(aes(ymin=VIOLMEANS3$mean-1.96*VIOLMEANS3$se, ymax=VIOLMEANS3$mean+1.96*VIOLMEANS3$se), width=.1, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4)) +
  geom_point(position=position_dodge(0.4))+
  labs(title="Rape",x="", y = "Response") +
  scale_y_continuous(limits=c(1,4)) +
  scale_colour_manual(values=c('#fba210', '#10dffb', '#2c3Ab8')) + 
  theme_bw()+
  theme(legend.title=element_blank(), axis.text.x = element_text(angle = 35, hjust = 1))

#wife-beating
MP2 <- ggplot(DVMEANS3, aes(x=OUTTYPE.FACTOR, y=mean, colour=FEMALE.FACTOR, shape=DVMEANS3$FEMALE.FACTOR)) +
  geom_errorbar(aes(ymin=DVMEANS3$mean-1.96*DVMEANS3$se, ymax=DVMEANS3$mean+1.96*DVMEANS3$se), width=.1, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4)) +
  geom_point(position=position_dodge(0.4))+
  labs(title="Wife-beating",x="", y = "Response") +
  scale_y_continuous(limits=c(1,4))+
  scale_colour_manual(values=c('#fba210', '#10dffb', '#2c3Ab8')) + 
  theme_bw()+
  theme(legend.title=element_blank(), axis.text.x = element_text(angle = 35, hjust = 1))

#theft
MP3 <-ggplot(VOLMEANS3, aes(x=OUTTYPE.FACTOR, y=mean, colour=FEMALE.FACTOR, shape=VOLMEANS3$FEMALE.FACTOR)) +
  geom_errorbar(aes(ymin=VOLMEANS3$mean-1.96*VOLMEANS3$se, ymax=VOLMEANS3$mean+1.96*VOLMEANS3$se), width=.1, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4)) +
  geom_point(position=position_dodge(0.4))+
  labs(title="Theft",x="", y = "Response") +
  scale_y_continuous(limits=c(1,4))+
  scale_colour_manual(values=c('#fba210', '#10dffb', '#2c3Ab8')) +  
  theme_bw()+
  theme(legend.title=element_blank(), axis.text.x = element_text(angle = 35, hjust = 1))


#legend and figure formatting
g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}
lay <- rbind(c(1,1,2,2,3,3,4),
             c(1,1,2,2,3,3,4))
mylegend<-g_legend(MP3)

#output figure
grid.arrange(MP1 + theme(legend.position="none"), 
             MP2+ theme(legend.position="none"), 
             MP3+ theme(legend.position="none"), 
             mylegend,
             layout_matrix = lay,
             nrow=2, ncol=7)

############################################################################
# FIGURE 8: Men's and Women's Preference Convergence
############################################################################

#I create new dataset where I stack the data such that I count each individual 4 times
#include a factor variable indicating which outcome it represents
#significance means difference from base category (private attitude) in terms of predicting outcomes
# This dataset focuses on the convergence variable rather than the preference variables


#unique identifier for each respondent
D3$ROWID2 <- rownames(D3)

#Generate Crime specific datasets with a variable OUTTYPE.XXX for each preference type
D3DIST.VIOL <- reshape(data=D3, varying = list(c("FGMEAN.PRIV1.VIOL", "FGMEAN.PRIV2.VIOL")), timevar = c("OUTTYPE.VIOL"), idvar = "ROWID2", direction="long", v.names = "ALLDIST.VIOL")

D3DIST.DV <- reshape(data=D3, varying = list(c("FGMEAN.PRIV1.DV", "FGMEAN.PRIV2.DV")), timevar = c("OUTTYPE.DV"), idvar = "ROWID2", direction="long", v.names = "ALLDIST.DV")

D3DIST.VOL <- reshape(data=D3, varying = list(c("FGMEAN.PRIV1.VOL", "FGMEAN.PRIV2.VOL")),  timevar = c("OUTTYPE.VOL"), idvar = "ROWID2", direction="long", v.names = "ALLDIST.VOL")


# #CREATE FACTOR VARIABLE TO GET LABELS
D3DIST.VIOL$OUTTYPE.FACTOR <- factor(D3DIST.VIOL$OUTTYPE.VIOL, levels=c(1, 2), labels=c("Private","Post-FG Private"))
D3DIST.DV$OUTTYPE.FACTOR <- factor(D3DIST.DV$OUTTYPE.DV, levels=c(1, 2), labels=c("Private","Post-FG Private"))
D3DIST.VOL$OUTTYPE.FACTOR <- factor(D3DIST.VOL$OUTTYPE.VOL, levels=c(1, 2), labels=c("Private","Post-FG Private"))

D3DIST.VIOL$FEMALE.FACTOR <- factor(D3DIST.VIOL$FEMALE, levels=c(0,1), labels=c("MALE","FEMALE"))
D3DIST.DV$FEMALE.FACTOR <- factor(D3DIST.DV$FEMALE, levels=c(0,1), labels=c("MALE","FEMALE"))
D3DIST.VOL$FEMALE.FACTOR <- factor(D3DIST.VOL$FEMALE, levels=c(0,1), labels=c("MALE","FEMALE"))

# use summary function to calculate summary statistics for convergence variables disaggregated by gender
VIOLMEANS.DIST <- summarySE(D3DIST.VIOL, measurevar="ALLDIST.VIOL", groupvars=c("FEMALE.FACTOR", "OUTTYPE.FACTOR"), na.rm=TRUE)
DVMEANS.DIST <- summarySE(D3DIST.DV, measurevar="ALLDIST.DV", groupvars=c("FEMALE.FACTOR", "OUTTYPE.FACTOR"), na.rm=TRUE)
VOLMEANS.DIST <- summarySE(D3DIST.VOL, measurevar="ALLDIST.VOL", groupvars=c("FEMALE.FACTOR","OUTTYPE.FACTOR"), na.rm=TRUE)

# get a measure for ALL together; and paste alongside gender disaggregated summary statistics
VIOLMEANS2.DIST <- cbind("", summarySE(D3DIST.VIOL, measurevar="ALLDIST.VIOL", groupvars=c("OUTTYPE.FACTOR"), na.rm=TRUE))
names(VIOLMEANS2.DIST)[1] <- "FEMALE.FACTOR"
VIOLMEANS2.DIST[,1] <- "ALL"
DVMEANS2.DIST <- cbind("", summarySE(D3DIST.DV, measurevar="ALLDIST.DV", groupvars=c("OUTTYPE.FACTOR"), na.rm=TRUE))
names(DVMEANS2.DIST)[1] <- "FEMALE.FACTOR"
DVMEANS2.DIST[,1] <- "ALL"
VOLMEANS2.DIST <- cbind("", summarySE(D3DIST.VOL, measurevar="ALLDIST.VOL", groupvars=c("OUTTYPE.FACTOR"), na.rm=TRUE))
names(VOLMEANS2.DIST)[1] <- "FEMALE.FACTOR"
VOLMEANS2.DIST[,1] <- "ALL"

# create data frame specific to convergence
VIOLMEANS3.DIST <- data.frame(rbind(VIOLMEANS.DIST, VIOLMEANS2.DIST))
DVMEANS3.DIST <- rbind(DVMEANS.DIST, DVMEANS2.DIST)
VOLMEANS3.DIST <- rbind(VOLMEANS.DIST, VOLMEANS2.DIST)

# generate a plot for each crime type
#rape
MP1.DIST <- ggplot(VIOLMEANS3.DIST, aes(x=OUTTYPE.FACTOR, y=mean, colour=FEMALE.FACTOR, shape=VIOLMEANS3.DIST$FEMALE.FACTOR)) +
  geom_errorbar(aes(ymin=VIOLMEANS3.DIST$mean-1.96*VIOLMEANS3.DIST$se, ymax=VIOLMEANS3.DIST$mean+1.96*VIOLMEANS3.DIST$se), width=.1, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4)) +
  geom_point(position=position_dodge(0.4))+
  labs(title="Rape",x="", y = "Response") +
  scale_y_continuous(limits=c(0,1)) +
  scale_colour_manual(values=c('#fba210', '#10dffb', '#2c3Ab8')) + 
  theme_bw()+
  theme(legend.title=element_blank(), axis.text.x = element_text(angle = 35, hjust = 1))

#wife-beating
MP2.DIST <- ggplot(DVMEANS3.DIST, aes(x=OUTTYPE.FACTOR, y=mean, colour=FEMALE.FACTOR, shape=DVMEANS3.DIST$FEMALE.FACTOR)) +
  geom_errorbar(aes(ymin=DVMEANS3.DIST$mean-1.96*DVMEANS3.DIST$se, ymax=DVMEANS3.DIST$mean+1.96*DVMEANS3.DIST$se), width=.1, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4)) +
  geom_point(position=position_dodge(0.4))+
  labs(title="Wife-beating",x="", y = "Response") +
  scale_y_continuous(limits=c(0,1))+
  scale_colour_manual(values=c('#fba210', '#10dffb', '#2c3Ab8')) + 
  theme_bw()+
  theme(legend.title=element_blank(), axis.text.x = element_text(angle = 35, hjust = 1))

#theft
MP3.DIST <-ggplot(VOLMEANS3.DIST, aes(x=OUTTYPE.FACTOR, y=mean, colour=FEMALE.FACTOR, shape=VOLMEANS3.DIST$FEMALE.FACTOR)) +
  geom_errorbar(aes(ymin=VOLMEANS3.DIST$mean-1.96*VOLMEANS3.DIST$se, ymax=VOLMEANS3.DIST$mean+1.96*VOLMEANS3.DIST$se), width=.1, position=position_dodge(0.4)) +
  geom_line(position=position_dodge(0.4)) +
  geom_point(position=position_dodge(0.4))+
  labs(title="Theft",x="", y = "Response") +
  scale_y_continuous(limits=c(0,1))+
  scale_colour_manual(values=c('#fba210', '#10dffb', '#2c3Ab8')) + 
  theme_bw()+
  theme(legend.title=element_blank(), axis.text.x = element_text(angle = 35, hjust = 1))

#format figure
lay.DIST <- rbind(c(1,1,2,2,3,3,4),
                  c(1,1,2,2,3,3,4))
mylegend.DIST<-g_legend(MP3.DIST)

#output figure
grid.arrange(MP1.DIST + theme(legend.position="none"), 
             MP2.DIST+ theme(legend.position="none"), 
             MP3.DIST+ theme(legend.position="none"), 
             mylegend.DIST,
             layout_matrix = lay.DIST,
             nrow=2, ncol=7)
